Call Libraries
library(tidyverse)
library(caret)
library(MASS)
library(car)
library(moments)
Calling the Transformed Datasets
income_cleaned = read_csv('NYS_Corp_Tax_Credit_data/income_cleaned.csv')
Rows: 1921 Columns: 6── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (2): Name, Group
dbl (4): Year, Num, Amount, Avg
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
income_cleaned
industry_cleaned = read_csv('NYS_Corp_Tax_Credit_data/industry_cleaned.csv')
Rows: 2476 Columns: 6── Column specification ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (2): Name, Group
dbl (4): Year, Num, Amount, Avg
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
industry_cleaned
Creating the Models
income.model <- lm(sat.formula, data = income_cleaned)
Error in eval(predvars, data, env) : object 'Avg.bc' not found
# linear.model.cleaned2 <- lm(Avg ~ . - Amount, data = income_cleaned)
# s = summary(linear.model.cleaned)
# show(s)
# plot(linear.model.cleaned)
#
# #histograms of response variable to check distribution
# hist(income_cleaned$Avg)
# hist(income_cleaned_bc$Avg.bc)
#
# #Shapiro-Wilks test to evaluate normality
# shapiro.test(income_cleaned$Avg)
# shapiro.test(income_cleaned_bc$Avg.bc)
#
# #Kurtosis evaluation (normal distribution has a value close to 3)
# moments::kurtosis(income_cleaned$Avg)
# moments::kurtosis(income_cleaned_bc$Avg.bc) #evaluated to 2.75 which means very close to 3 so it is normally distributed with possibly slightly less outliers.
Correcting violation of Normality in previous model with BoxCox transform
Checking linear regression assumptions for the transformed data.
#Industry
sat.model.summary(industry_cleaned_bc, sat.field, sat.formula)
Shapiro-Wilk normality test
data: df[[field]]
W = 0.9902, p-value = 5.826e-12
[1] 2.513097
Call:
lm(formula = sat.formula, data = df)
Residuals:
Min 1Q Median 3Q Max
-6.2890 -0.4698 0.0171 0.4942 3.9076
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -6.143e+01 1.110e+01 -5.532 3.51e-08 ***
Year 3.464e-02 5.504e-03 6.294 3.67e-10 ***
NameAlternative Fuels and Electric Vehicle Recharging Property Credit 2.667e-01 2.734e-01 0.976 0.329351
NameAlternative Minimum Tax Credit -2.461e+00 2.273e-01 -10.827 < 2e-16 ***
NameBeer Production Credit 6.344e-01 3.381e-01 1.876 0.060762 .
NameBrownfield Tax Credits - Redevelopment Tax Credit - On or after 6/23/08 but before 7/1/15 2.199e+00 2.416e-01 9.101 < 2e-16 ***
NameBrownfield Tax Credits - Redevelopment Tax Credit - On or after 7/1/15 2.726e+00 3.930e-01 6.937 5.13e-12 ***
NameBrownfield Tax Credits - Redevelopment Tax Credit - Prior to 6/23/08 1.587e+00 2.581e-01 6.149 9.11e-10 ***
NameBrownfield Tax Credits - Remediation Real Property Tax Credit 9.698e-01 2.571e-01 3.773 0.000165 ***
NameClean Heating Fuel Credit -2.544e+00 2.482e-01 -10.250 < 2e-16 ***
NameConservation Easement Tax Credit -1.122e+00 2.538e-01 -4.421 1.03e-05 ***
NameCredit for Employment of Persons with Disabilities -1.244e+00 2.738e-01 -4.546 5.74e-06 ***
NameCredit for Purchase of an Automated External Defibrillator -1.535e+00 2.411e-01 -6.364 2.34e-10 ***
NameCredit for Taxicabs & Livery Service Vehicles Accessible to Persons with Disabilities -8.142e-02 4.475e-01 -0.182 0.855647
NameEmpire State Apprentice Tax Credit -8.684e-01 5.464e-01 -1.589 0.112126
NameEmpire State Commercial Production Credit 6.241e-01 3.333e-01 1.872 0.061269 .
NameEmpire State Film Post Production Credit 1.439e+00 2.821e-01 5.100 3.65e-07 ***
NameEmpire State Film Production Credit 3.233e+00 2.584e-01 12.509 < 2e-16 ***
NameEmpire State Musical and Theatrical Production Credit 1.012e+00 4.866e-01 2.080 0.037610 *
NameExcelsior Jobs Program Credit 1.644e+00 2.372e-01 6.930 5.39e-12 ***
NameEZ/QEZE Tax Credits - EZ Investment Tax Credit 1.310e+00 2.332e-01 5.619 2.14e-08 ***
NameEZ/QEZE Tax Credits - EZ Wage Tax Credit 7.150e-01 2.275e-01 3.142 0.001696 **
NameEZ/QEZE Tax Credits - QEZE Credit for Real Property Taxes 1.708e+00 2.264e-01 7.542 6.53e-14 ***
NameEZ/QEZE Tax Credits - QEZE Credit for Real Property Taxes For Corporate Partners 9.784e-01 2.328e-01 4.202 2.74e-05 ***
NameEZ/QEZE Tax Credits - QEZE Tax Reduction Credit 1.616e-01 2.304e-01 0.701 0.483154
NameEZ/QEZE Tax Credits - QEZE Tax Reduction Credit For Corporate Partners -5.234e-01 2.723e-01 -1.923 0.054647 .
NameFarm Workforce Retention Credit -8.624e-01 2.890e-01 -2.984 0.002876 **
NameFarmers' School Tax Credit -1.158e+00 2.772e-01 -4.176 3.07e-05 ***
NameHire a Veteran Credit -1.040e+00 4.457e-01 -2.333 0.019731 *
NameHistoric Properties Rehabilitation Credit 2.398e+00 2.614e-01 9.172 < 2e-16 ***
NameInvestment Tax Credit 6.997e-01 2.230e-01 3.138 0.001724 **
NameInvestment Tax Credit for the Financial Services Industry 1.561e+00 3.109e-01 5.020 5.53e-07 ***
NameLife Sciences Research & Development Tax Credit 9.053e-01 4.864e-01 1.861 0.062867 .
NameLong-Term Care Insurance Credit -1.866e+00 2.268e-01 -8.228 3.08e-16 ***
NameLow-Income Housing Credit 1.331e+00 2.967e-01 4.486 7.58e-06 ***
NameManufacturer\u0092s Real Property Tax Credit -6.077e-01 2.528e-01 -2.404 0.016286 *
NameManufacturer�s Real Property Tax Credit -7.363e-01 2.758e-01 -2.669 0.007652 **
NameMinimum Wage Reimbursement Credit -9.159e-01 2.366e-01 -3.871 0.000111 ***
NameMortgage Servicing Tax Credit 8.932e-01 3.490e-01 2.560 0.010540 *
NameNew York Youth Jobs Program Tax Credit 1.266e-01 2.307e-01 0.549 0.583231
NameQETC Capital Tax Credit 1.163e+00 3.169e-01 3.670 0.000248 ***
NameQETC Employment Credit -3.045e-01 2.412e-01 -1.263 0.206769
NameQETC Facilities, Operations, and Training Credit 1.148e+00 3.624e-01 3.168 0.001554 **
NameSpecial Additional Mortgage Recording Tax Credit 7.509e-01 2.394e-01 3.137 0.001728 **
NameSTART-UP NY Tax Elimination Credit -1.834e+00 2.565e-01 -7.150 1.15e-12 ***
GroupAdministrative and Support and Waste Management and Remediation Services 2.884e-01 1.360e-01 2.120 0.034078 *
GroupAdministrative/Support/Waste Management/Remediation Services 1.680e-01 1.511e-01 1.112 0.266251
GroupAgriculture, Forestry, Fishing and Hunting -1.135e-01 1.258e-01 -0.902 0.367038
GroupArts, Entertainment, and Recreation 5.595e-01 1.251e-01 4.472 8.11e-06 ***
GroupConstruction -4.326e-02 1.172e-01 -0.369 0.712181
GroupEducational Services 2.253e-01 1.592e-01 1.415 0.157097
GroupFinance and Insurance 5.350e-01 1.099e-01 4.870 1.19e-06 ***
GroupHealth Care and Social Assistance -6.769e-02 1.237e-01 -0.547 0.584224
GroupInformation 7.096e-01 1.176e-01 6.034 1.84e-09 ***
GroupManagement of Companies and Enterprises 6.670e-01 1.053e-01 6.337 2.79e-10 ***
GroupManufacturing 4.604e-01 1.095e-01 4.204 2.72e-05 ***
GroupMining 2.521e-01 1.940e-01 1.299 0.194080
GroupMining, Quarrying, and Oil and Gas Extraction 3.858e-01 1.626e-01 2.372 0.017774 *
GroupOther Services (except Public Administration) -1.549e-01 1.197e-01 -1.293 0.195989
GroupProfessional, Scientific, and Technical Services 4.877e-01 1.126e-01 4.332 1.54e-05 ***
GroupReal Estate and Rental and Leasing 1.464e-01 1.104e-01 1.326 0.184896
GroupRetail Trade 3.752e-01 1.100e-01 3.411 0.000657 ***
GroupTransportation and Warehousing 2.125e-01 1.242e-01 1.711 0.087224 .
GroupUtilities 6.979e-01 1.422e-01 4.908 9.83e-07 ***
GroupWholesale Trade 4.364e-01 1.124e-01 3.882 0.000106 ***
Num 4.555e-04 2.307e-04 1.974 0.048459 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.8704 on 2410 degrees of freedom
Multiple R-squared: 0.7632, Adjusted R-squared: 0.7568
F-statistic: 119.5 on 65 and 2410 DF, p-value: < 2.2e-16
GVIF Df GVIF^(1/(2*Df))
Year 2.202893 1 1.484214
Name 5.218180 43 1.019397
Group 2.726612 20 1.025394
Num 1.371056 1 1.170921
Visualization and model stats
Stepwise Regression on Income_cat_bc (boxcox transformed dataset)
#creating dummy variable columns for stepwise
dummy_func <- function (df){
x = model.matrix(Avg.bc ~., df)[, -1]
dummy_bc = as.data.frame(x) %>% mutate(Avg.bc = df$Avg.bc)
colnames(dummy_bc) <- str_replace_all(colnames(dummy_bc), "-|'|/| |,|�" , '_')
return(dummy_bc)
}
Cleaning column names further so stepwise regression doesn’t present any errors
#Income Group Dataset
income.dummy.bc <- dummy_func(income_cleaned_bc)
colnames(income.dummy.bc)[37] <- 'NameManufactureru0092s_Real_Property_Tax_Credit'
colnames(income.dummy.bc)
#Industry Group Dataset
industry.dummy.bc <- dummy_func(industry_cleaned_bc)
colnames(industry.dummy.bc)[35] <- 'NameManufactureru0092s_Real_Property_Tax_Credit'
colnames(industry.dummy.bc)
Stepwise regression using BIC as the criteria (the penalty k = log(n)).
#creating models
bcs = list(income = income.dummy.bc, industry = industry.dummy.bc)
bcs[['industry']]
k = c('income', 'industry')
forwardBIC = list(income = NULL, industry = NULL)
forwardBIC[['income']]
backwardBIC = list(income = NULL, industry = NULL)
for (i in k){
bc = bcs[[i]]
model.empty = lm(Avg.bc ~ 1, data = bc) #intercept only
model.full = lm(Avg.bc ~ ., data = bc) #All variables
scope = list(lower = formula(model.empty), upper = formula(model.full))
n_obs = bc %>% count() %>% first()
forwardBIC[[i]] = step(model.empty, scope, direction = "forward", k = log(n_obs))
backwardBIC[[i]] = step(model.full, scope, direction = "backward", k = log(n_obs))
}
Selecting Best Formula per Dataset from Stepwise Regressions
bic_func <- function (BIC.model){
BIC.coeff <- data.frame(summary(BIC.model)$coefficients)
print(BIC.coeff %>% arrange(rownames(BIC.coeff)))
print(summary(BIC.model))
print(vif(BIC.model))
}
bic_func(backwardBIC[['industry']])
bic_func(forwardBIC[['industry']])
#Income Group Dataset
income.best.formula <- backwardBIC[['income']]$call[[2]]
income.best.formula
#Industry Group Dataset
industry.best.formula <- forwardBIC[['industry']]$call[[2]]
industry.best.formula
Stepwise regression using BIC as the criteria (the penalty k = log(n)).
#checking our selected model's predictor variable's VIFs
vif(backwardBIC)
Year
1.661856
NameAlternative_Fuels_and_Electric_Vehicle_Recharging_Property_Credit
1.040066
NameAlternative_Minimum_Tax_Credit
1.355727
NameBrownfield_Tax_Credits___Redevelopment_Tax_Credit___On_or_after_6_23_08_but_before_7_1_15
1.077139
NameBrownfield_Tax_Credits___Redevelopment_Tax_Credit___On_or_after_7_1_15
1.032409
NameBrownfield_Tax_Credits___Redevelopment_Tax_Credit___Prior_to_6_23_08
1.076724
NameClean_Heating_Fuel_Credit
1.086361
NameConservation_Easement_Tax_Credit
1.071241
NameCredit_for_Employment_of_Persons_with_Disabilities
1.050364
NameCredit_for_Purchase_of_an_Automated_External_Defibrillator
1.084252
NameEmpire_State_Apprentice_Tax_Credit
1.028286
NameEmpire_State_Film_Post_Production_Credit
1.067360
NameEmpire_State_Film_Production_Credit
1.079378
NameEZ_QEZE_Tax_Credits___EZ_Investment_Tax_Credit
1.203345
NameEZ_QEZE_Tax_Credits___QEZE_Credit_for_Real_Property_Taxes
1.110401
NameEZ_QEZE_Tax_Credits___QEZE_Tax_Reduction_Credit
1.094191
NameEZ_QEZE_Tax_Credits___QEZE_Tax_Reduction_Credit_For_Corporate_Partners
1.059267
NameFarm_Workforce_Retention_Credit
1.043336
NameFarmers__School_Tax_Credit
1.075009
NameHire_a_Veteran_Credit
1.016637
NameHistoric_Properties_Rehabilitation_Credit
1.066991
NameIndustrial_or_Manufacturing_Business_Tax_Credit
1.180919
NameLong_Term_Care_Insurance_Credit
1.085593
NameLow_Income_Housing_Credit
1.059690
NameManufactureru0092s_Real_Property_Tax_Credit
1.059324
NameManufacturer_s_Real_Property_Tax_Credit
1.051337
NameMinimum_Wage_Reimbursement_Credit
1.074669
NameMortgage_Servicing_Tax_Credit
1.068997
NameNew_York_Youth_Jobs_Program_Tax_Credit
1.109900
NameQETC_Employment_Credit
1.072612
NameSpecial_Additional_Mortgage_Recording_Tax_Credit
1.114775
NameSTART_UP_NY_Tax_Elimination_Credit
1.055977
ENI1_000_000___24_999_999
1.850053
ENI100_000___499_999
1.795816
ENI100_000_000___499_999_999
1.691776
ENI25_000_000___49_999_999
1.578815
ENI50_000_000___99_999_999
1.576866
ENI500_000___999_999
1.708105
ENI500_000_000___and_over
1.744570
ENIZero_or_Net_Loss
1.867768
Num
1.533625
Splitting data up into test data and training data (test data is for year 2019, training is the rest)
dim(income.data.split)
NULL
Lasso regression for comparison to backward stepwise
for (i in k){
X.train <- xy.splits[[i]][['X.train']]
y.train <- xy.splits[[i]][['y.train']]
X.test <- xy.splits[[i]][['X.test']]
y.test <- xy.splits[[i]][['y.test']]
#create lambda grid
lambda.grid = 10^seq(2, -5, length = 100)
#create lasso models with lambda.grid
lasso.models = glmnet(X.train, y.train, alpha = 1, lambda = lambda.grid)
#visualize coefficient shrinkage
plot(lasso.models, xvar = "lambda", label = TRUE, main = paste("Lasso Regression:", i))
#Cross Validation to find best lambda
set.seed(0)
cv.lasso.models <- cv.glmnet(X.train, y.train, alpha = 1, lambda = lambda.grid, nfolds = 10)
#visualize cross validation for lambda that minimizes the mean squared error.
plot(cv.lasso.models, main = paste("Lasso Regression:", i))
#Checking the best lambda
log(cv.lasso.models$lambda.min)
best.lambda <- cv.lasso.models$lambda.min
print(paste(i, ' best.lambda:', best.lambda))
# best lambda with all the variables was found to be 0.0006892612
# best lambda with only the bwdBIC coefficients included was found to be 0.0003053856
#looking at the lasso coefficients for the best.lambda
best.lambda.coeff <- predict(lasso.models, s = best.lambda, type = "coefficients")
print(best.lambda.coeff)
#fitting a model with the best lambda found to be 0.000689 and using it to make predictions for the testing data.
lasso.best.lambda.train.pred <- predict(lasso.models, s = best.lambda, newx = X.test)
lasso.best.lambda.train.pred
#checking MSE
MSE.lasso <- mean((lasso.best.lambda.train.pred - y.test)^2)
sat.data.split <- test_train_split(xy.sat.splits[[i]], Avg.bc ~ .)
sat.model.bc <- lm(Avg.bc ~., data = as.data.frame(cbind(sat.data.split[['X.train']], sat.data.split[['y.train']])))
MSE.sat <- calc_MSE(sat.model.bc, as.data.frame(sat.data.split[['X.test']]), sat.data.split[['y.test']])
print(paste(i, ' Lasso MSE: ', MSE.lasso, ' Saturated MSE: ', MSE.sat))
}
[1] "income best.lambda: 0.000359381366380463"
42 x 1 sparse Matrix of class "dgCMatrix"
s1
(Intercept) -17.284028557
Year 0.013476737
NameAlternative_Fuels_and_Electric_Vehicle_Recharging_Property_Credit -1.311613095
NameAlternative_Minimum_Tax_Credit -2.231733278
NameBrownfield_Tax_Credits___Redevelopment_Tax_Credit___On_or_after_6_23_08_but_before_7_1_15 1.563930684
NameBrownfield_Tax_Credits___Redevelopment_Tax_Credit___On_or_after_7_1_15 2.495348478
NameBrownfield_Tax_Credits___Redevelopment_Tax_Credit___Prior_to_6_23_08 1.252395863
NameClean_Heating_Fuel_Credit -3.196327520
NameConservation_Easement_Tax_Credit -2.276656433
NameCredit_for_Employment_of_Persons_with_Disabilities -3.290414339
NameCredit_for_Purchase_of_an_Automated_External_Defibrillator -3.159453144
NameEmpire_State_Apprentice_Tax_Credit -3.376054527
NameEmpire_State_Film_Post_Production_Credit 0.932428124
NameEmpire_State_Film_Production_Credit 2.721680583
NameEZ_QEZE_Tax_Credits___EZ_Investment_Tax_Credit 0.574494696
NameEZ_QEZE_Tax_Credits___QEZE_Credit_for_Real_Property_Taxes 0.960915301
NameEZ_QEZE_Tax_Credits___QEZE_Tax_Reduction_Credit -1.075189828
NameEZ_QEZE_Tax_Credits___QEZE_Tax_Reduction_Credit_For_Corporate_Partners -1.501009194
NameFarm_Workforce_Retention_Credit -1.861878308
NameFarmers__School_Tax_Credit -1.462208940
NameHire_a_Veteran_Credit -3.075132150
NameHistoric_Properties_Rehabilitation_Credit 1.795906161
NameIndustrial_or_Manufacturing_Business_Tax_Credit -1.970508839
NameLong_Term_Care_Insurance_Credit -2.991508779
NameLow_Income_Housing_Credit -1.276270928
NameManufactureru0092s_Real_Property_Tax_Credit -1.538480115
NameManufacturer_s_Real_Property_Tax_Credit -1.815450711
NameMinimum_Wage_Reimbursement_Credit -1.376381572
NameMortgage_Servicing_Tax_Credit -0.965444029
NameNew_York_Youth_Jobs_Program_Tax_Credit -1.590640664
NameQETC_Employment_Credit -0.853986295
NameSpecial_Additional_Mortgage_Recording_Tax_Credit -0.372417089
NameSTART_UP_NY_Tax_Elimination_Credit -2.300657711
Group1_000_000___24_999_999 1.151959932
Group100_000___499_999 0.353836083
Group100_000_000___499_999_999 1.726777151
Group25_000_000___49_999_999 1.377240385
Group50_000_000___99_999_999 1.526670338
Group500_000___999_999 0.591982587
Group500_000_000___and_over 2.412492613
GroupZero_or_Net_Loss 1.042816832
Num -0.001613791
[1] "income Lasso MSE: 1.45647173028653 Saturated MSE: 1.47368550172763"
[1] "industry best.lambda: 0.000114975699539774"
43 x 1 sparse Matrix of class "dgCMatrix"
s1
(Intercept) -61.87390882
NameAlternative_Minimum_Tax_Credit -3.17738814
NameLong_Term_Care_Insurance_Credit -2.61687663
NameClean_Heating_Fuel_Credit -3.20715109
NameEmpire_State_Film_Production_Credit 2.50121671
NameCredit_for_Purchase_of_an_Automated_External_Defibrillator -2.32395331
NameSTART_UP_NY_Tax_Elimination_Credit -2.73114539
NameMinimum_Wage_Reimbursement_Credit -1.67875085
NameEZ_QEZE_Tax_Credits___QEZE_Credit_for_Real_Property_Taxes 0.91834251
NameBrownfield_Tax_Credits___Redevelopment_Tax_Credit___On_or_after_6_23_08_but_before_7_1_15 1.63074712
NameHistoric_Properties_Rehabilitation_Credit 1.70377612
NameExcelsior_Jobs_Program_Credit 0.98122860
NameConservation_Easement_Tax_Credit -1.84268727
NameCredit_for_Employment_of_Persons_with_Disabilities -2.02070562
NameFarmers__School_Tax_Credit -1.76586266
NameEZ_QEZE_Tax_Credits___EZ_Investment_Tax_Credit 0.54112912
NameBrownfield_Tax_Credits___Redevelopment_Tax_Credit___Prior_to_6_23_08 0.78546985
NameManufactureru0092s_Real_Property_Tax_Credit -1.32671371
NameBrownfield_Tax_Credits___Redevelopment_Tax_Credit___On_or_after_7_1_15 2.14607941
`GroupOther_Services_(except_Public_Administration)` -0.39320233
NameEmpire_State_Film_Post_Production_Credit 0.73195196
GroupManagement_of_Companies_and_Enterprises 0.47699822
NameFarm_Workforce_Retention_Credit -1.68344698
NameManufacturer_s_Real_Property_Tax_Credit -1.55252782
NameQETC_Employment_Credit -1.06268465
NameEZ_QEZE_Tax_Credits___QEZE_Tax_Reduction_Credit_For_Corporate_Partners -1.31500410
Year 0.03534486
NameNew_York_Youth_Jobs_Program_Tax_Credit -0.75302790
NameEZ_QEZE_Tax_Credits___QEZE_Tax_Reduction_Credit -0.63972813
GroupAgriculture__Forestry__Fishing_and_Hunting -0.34353275
GroupConstruction -0.29738810
GroupHealth_Care_and_Social_Assistance -0.29780385
NameHire_a_Veteran_Credit -1.74532218
GroupInformation 0.47997729
NameInvestment_Tax_Credit_for_the_Financial_Services_Industry 0.77828597
GroupFinance_and_Insurance 0.33842083
GroupUtilities 0.48589130
NameEmpire_State_Apprentice_Tax_Credit -2.52886106
NameAlternative_Fuels_and_Electric_Vehicle_Recharging_Property_Credit -0.62586141
GroupManufacturing 0.29977977
GroupProfessional__Scientific__and_Technical_Services 0.27403574
GroupArts__Entertainment__and_Recreation 0.36963834
GroupWholesale_Trade 0.23629280
[1] "industry Lasso MSE: 0.787120640328352 Saturated MSE: 0.757030931332998"
1.47368550172763
as.data.frame(lasso.best.lambda.train.pred) %>% mutate(Avg_in_dollars = (lasso.best.lambda.train.pred*lambda.bc+1)^(1/lambda.bc))
calc_MSE(sat.model.bc, as.data.frame(income.sat.data.split[['X.test']]), income.sat.data.split[['y.test']])
[1] 1.473686